######################################################
# CreateCovariates.R
# This script codes the covariates specified in the pre-analysis plan
# and implements are pre-specified imputation for covariates (for missing data on covariates)
# note this file requires the working directory to be set to "./Merge and Clean Data/"
# Contact Ryan Jablonski, r.s.jablonski@lse.ac.uk with questions
#
# Log
# Created 2018
# Edited for APSR replication 17 August 2023 by Ryan Jablonski
######################################################


rm(list=ls())

mp.survey.all=read.csv("./output/mp_all_withtests.csv", stringsAsFactors = FALSE)
mp.survey=read.csv("./output/mp_withtests.csv", stringsAsFactors = FALSE)
c.survey.all=read.csv("./output/c_all_withtests.csv", stringsAsFactors = FALSE)
c.survey=read.csv("./output/c_withtests.csv", stringsAsFactors = FALSE)

elecresults.lc=read.csv("./input/2014 councillor election results by school.csv", stringsAsFactors=FALSE)
elecresults.mp=read.csv("./input/2014 mp election results by school.csv", stringsAsFactors=FALSE)

schooldata.lc=read.csv("./output/Schools.forLC.withpoppoverty.csv", stringsAsFactors=FALSE)
schooldata.mp=read.csv("./output/Schools.forMP.withpoppoverty.csv", stringsAsFactors=FALSE)

#this includes all polling stations. Need this so we can calculate overall vote shares.
psdata.mp = read.csv("./input/2014 mp election results by ps.csv", stringsAsFactors=FALSE)


# merge in ward and constituency level population estimates.
# This is data from the world pop project.
# see https://hub.worldpop.org/geodata/summary?id=123
popdata.lc=read.csv("./input/worldpop_ward.csv", stringsAsFactors = FALSE)
popdata.mp=read.csv("./input/worldpop_const.csv", stringsAsFactors = FALSE)

c.survey.all$wp_mean=popdata.lc[match(c.survey.all$ps_ward_id, popdata.lc$ps_ward_id), "world_pop_mean"]
c.survey.all$wp_area=popdata.lc[match(c.survey.all$ps_ward_id, popdata.lc$ps_ward_id), "world_pop_grid_cell_count"]
c.survey.all$wp_sum=popdata.lc[match(c.survey.all$ps_ward_id, popdata.lc$ps_ward_id), "world_pop_sum"]

c.survey$wp_mean=popdata.lc[match(c.survey$ps_ward_id, popdata.lc$ps_ward_id), "world_pop_mean"]
c.survey$wp_area=popdata.lc[match(c.survey$ps_ward_id, popdata.lc$ps_ward_id), "world_pop_grid_cell_count"]
c.survey$wp_sum=popdata.lc[match(c.survey$ps_ward_id, popdata.lc$ps_ward_id), "world_pop_sum"]

mp.survey.all$wp_mean=popdata.mp[match(mp.survey.all$constituency_id, popdata.mp$constituencyid), "world_pop_mean"]
mp.survey.all$wp_area=popdata.mp[match(mp.survey.all$constituency_id, popdata.mp$constituencyid), "world_pop_grid_cell_count"]
mp.survey.all$wp_sum=popdata.mp[match(mp.survey.all$constituency_id, popdata.mp$constituencyid), "world_pop_sum"]

mp.survey$wp_mean=popdata.mp[match(mp.survey$constituency_id, popdata.mp$constituencyid), "world_pop_mean"]
mp.survey$wp_area=popdata.mp[match(mp.survey$constituency_id, popdata.mp$constituencyid), "world_pop_grid_cell_count"]
mp.survey$wp_sum=popdata.mp[match(mp.survey$constituency_id, popdata.mp$constituencyid), "world_pop_sum"]



#calculate leading opposition candidate
psdata.mp.ag = aggregate(data.frame(psdata.mp$votes1, psdata.mp$votes2, psdata.mp$votes3,psdata.mp$votes4,psdata.mp$votes5,psdata.mp$votes6,psdata.mp$votes7,
                                    psdata.mp$votes8,psdata.mp$votes9,psdata.mp$votes10,psdata.mp$votes11,psdata.mp$votes12,psdata.mp$votes13,psdata.mp$votes14), by=list(psdata.mp$constituency), FUN=sum, na.rm=TRUE)
ps.order=apply(psdata.mp.ag[,c("psdata.mp.votes1", "psdata.mp.votes2", "psdata.mp.votes3", "psdata.mp.votes4", "psdata.mp.votes5", "psdata.mp.votes6", "psdata.mp.votes7", "psdata.mp.votes8", "psdata.mp.votes9", "psdata.mp.votes10","psdata.mp.votes11", "psdata.mp.votes12", "psdata.mp.votes13", "psdata.mp.votes14")], 1, order)
psdata.mp.ag$opposition = ps.order[13,]
psdata.mp$opposition = psdata.mp.ag[match(psdata.mp$constituency, psdata.mp.ag$Group.1),"opposition"]
psdata.mp$ps_opposition_votes=NA
for(i in c(1:14)){
  psdata.mp$ps_opposition_votes=ifelse(psdata.mp$opposition==i, eval(parse(text=paste("psdata.mp$votes", i, sep=""))), psdata.mp$ps_opposition_votes)
}
psdata.mp$ps_opposition_percent=psdata.mp$ps_opposition_votes/psdata.mp$total_votes
psdata.mp$ps_victory_margin=psdata.mp$mp_share-psdata.mp$ps_opposition_percent



c.survey.all$total_votes_local=c.survey.all$total_votes_local2
c.survey$total_votes_local=c.survey$total_votes_local2

labels=data.frame(varname=NA, label=NA, label_description=NA, include=NA, alt.specific=NA, stringsAsFactors = FALSE)

rbind.match.columns <- function(input1, input2) {
  n.input1 <- ncol(input1)
  n.input2 <- ncol(input2)
  
  if (n.input2 < n.input1) {
    TF.names <- which(names(input2) %in% names(input1))
    column.names <- names(input2[, TF.names])
  } else {
    TF.names <- which(names(input1) %in% names(input2))
    column.names <- names(input1[, TF.names])
  }
  
  return(rbind(input1[, column.names], input2[, column.names]))
}



impute_var=function(varname, dfname){

  df.temp=eval(parse(text=dfname))
  df.temp$country_id=1
  df.temp$tempvar=eval(parse(text=paste("df.temp$", varname, sep="")))
  
  print(paste("imputed", sum(is.na(df.temp$tempvar)), "observations for", varname))
  df.temp$tempvar_map = NA
  df.temp$tempvar_ward = NA
  df.temp$tempvar_const = NA
  
  m_map=aggregate(data.frame(tempvar_map=df.temp$tempvar), by=list(df.temp$map_id), mean, na.rm=TRUE)
  
  m_ward=aggregate(data.frame(tempvar_ward=df.temp$tempvar), by=list(df.temp$ps_ward_id), mean, na.rm=TRUE)
  m_const=aggregate(data.frame(tempvar_const=df.temp$tempvar), by=list(df.temp$constituencyid), mean, na.rm=TRUE)
  m_district=aggregate(data.frame(tempvar_district=df.temp$tempvar), by=list(df.temp$district_id), mean, na.rm=TRUE)
  m_all=aggregate(data.frame(tempvar_all=df.temp$tempvar),by=list(df.temp$country_id),  mean, na.rm=TRUE)
  
  df.temp$tempvar_map = m_map[match(df.temp$map_id, m_map$Group.1), "tempvar_map"]
  df.temp$tempvar_ward = m_ward[match(df.temp$ps_ward_id, m_ward$Group.1), "tempvar_ward"]
  
  df.temp$tempvar_const = m_const[match(df.temp$constituencyid, m_const$Group.1), "tempvar_const"]
  df.temp$tempvar_district = m_district[match(df.temp$district_id, m_district$Group.1), "tempvar_district"]
  df.temp$tempvar_all = m_all[match(df.temp$country_id, m_all$Group.1), "tempvar_all"]
  
  df.temp$tempvar=ifelse(is.na(df.temp$tempvar), df.temp$tempvar_map, df.temp$tempvar)
  df.temp$tempvar=ifelse(is.na(df.temp$tempvar), df.temp$tempvar_ward, df.temp$tempvar)
  df.temp$tempvar=ifelse(is.na(df.temp$tempvar), df.temp$tempvar_const, df.temp$tempvar)
  df.temp$tempvar=ifelse(is.na(df.temp$tempvar), df.temp$tempvar_district, df.temp$tempvar)
  df.temp$tempvar=ifelse(is.na(df.temp$tempvar), df.temp$tempvar_all, df.temp$tempvar)
  
  return(df.temp$tempvar)
  
  
}

for(this.dfname in c("mp.survey", "c.survey", "mp.survey.all", "c.survey.all")){
  
  this.df=eval(parse(text=this.dfname))
  this.df$attritted=1
  this.df[!is.na(this.df$instanceID),]$attritted=0
  
  #Population of community
  this.df$log_pop=log(this.df$wp_sum)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_pop","Log Population", "Log Constituency/Ward Population (WorldPop)", 1, 0))}
  this.df$log_pop=impute_var( "log_pop","this.df")
  
  
  #Size of the constituency/ward
  this.df$log_area=log(this.df$wp_area)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_area", "Log Area", "Log Constituency/Ward Area in Square Km (WorldPop)", 1, 0))}
  this.df$log_area=impute_var( "log_area","this.df")
  
  #Number of students
  this.df$log_number_of_students=log(this.df$student_to_classroom_ratio*(this.df$school_classrooms_permanent+this.df$school_classrooms_temporary)+1)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_number_of_students", "Log Enrollment", "Log Number of Students in School +1 (Malawi Dept of Education)", 1, 1))}
  this.df$log_number_of_students=impute_var( "log_number_of_students","this.df")
  this.df$number_of_students=(this.df$student_to_classroom_ratio*(this.df$school_classrooms_permanent+this.df$school_classrooms_temporary)+1)
  
  #Number of teachers
  this.df$log_number_of_teachers=log(this.df$school_num_teachers+1)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_number_of_teachers", "Log Teachers", "Log Number of Teachers in School +1 (Malawi Dept of Education)", 1,1))}
  this.df$log_number_of_teachers=impute_var( "log_number_of_teachers","this.df")
  
  #Whether a councillor's or family member's children attend school
  
  this.df$children_attend_school_Yes=NA
  this.df$children_attend_school_No=NA
  this.df$children_attend_school_DK=NA
  this.df$children_attend_school_Yes=ifelse(this.df$aid_53=="Yes" | this.df$aid_54=="Yes"| nchar(this.df$aid_53_1)>0 | nchar(this.df$aid_54_1)>0, 1, 0)
  this.df$children_attend_school_No=ifelse(this.df$aid_53=="No" & this.df$aid_54=="No", 1, 0)
  this.df$children_attend_school_DK=ifelse(this.df$children_attend_school_No==0 & this.df$children_attend_school_Yes==0, 1, 0)
  
  this.df$children_attend_school_Yes=ifelse(this.df$aid_53=="" & this.df$aid_54=="", NA, this.df$children_attend_school_Yes)
  this.df$children_attend_school_No=ifelse(this.df$aid_53=="" & this.df$aid_54=="", NA, this.df$children_attend_school_No)
  this.df$children_attend_school_DK=ifelse(this.df$aid_53=="" & this.df$aid_54=="", NA, this.df$children_attend_school_DK)
  
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("children_attend_school_Yes", "ChildrenAttend=Yes", "Whether incumbent's or family member's children attend school in the constituency=Yes  (survey)", 1, 0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("children_attend_school_No",  "ChildrenAttend=No", "Whether incumbent's or family member's children attend school in the constituency=No  (survey)", 1, 0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("children_attend_school_DK",  "ChildrenAttend=Don't Know", "Whether incumbent's or family member's children attend school in the constituency=Don't Know  (survey)", 1, 0))}
  
  
  
  #Whether a councillor's or family member's children attend school specifically
  this.df$children_attend_school=ifelse(is.na(this.df$children_attend_school) & this.df$attritted==0, 0, this.df$children_attend_school)
  this.df$family_children_attend_school=ifelse(is.na(this.df$family_children_attend_school) & this.df$attritted==0, 0, this.df$family_children_attend_school)
  
  this.df$any_children_attend_school=NA
  this.df$any_children_attend_school= ifelse(this.df$family_children_attend_school==0 | this.df$children_attend_school==0, 0, this.df$any_children_attend_school)
  this.df$any_children_attend_school= ifelse(this.df$family_children_attend_school==1 | this.df$children_attend_school==1, 1, this.df$any_children_attend_school)
  
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("children_attend_school",  "Incumbent's Children Attends School", "Whether incumbent's children attends this school (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("family_children_attend_school",  "Incumbent's Relatives Attend School", "Whether incumbent's family member's children attends this school (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("any_children_attend_school",  "Family Attends School", "Whether incumbent's children or family member's children attends this school (survey)", 1,0))}
  
  this.df$understandmaps=ifelse(this.df$aid_22=="School B", 1, 0)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("understandmaps",  "Incumbent Understood Maps", "Whether incumbent correctly indicated a response in a test map (survey)", 1,0))}
  
  
  # Number of temporary classrooms
  this.df$log_school_classrooms_temporary=log(this.df$school_classrooms_temporary+1)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_school_classrooms_temporary", "Log Temporary Classrooms", "Log Number of Temporary Classrooms in School +1 (Malawi Dept of Education)", 1,1))}
  this.df$log_school_classrooms_temporary=impute_var( "log_school_classrooms_temporary","this.df")
  
  # Number of permanent classrooms
  this.df$log_school_classrooms_permanent=log(this.df$school_classrooms_permanent+1)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_school_classrooms_permanent", "Log Permanent Classrooms", "Log Number of Permanent Classrooms in School +1 (Malawi Dept of Education)", 1,1))}
  this.df$log_school_classrooms_permanent=impute_var( "log_school_classrooms_permanent","this.df")
  
  # Number of temporary houses for teachers
  this.df$log_school_teacher_houses_temporary=log(this.df$school_teacher_houses_temporary+1)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_school_teacher_houses_temporary", "Log Temporary Houses", "Log Number of Temporary Teacher Houses in School +1 (Malawi Dept of Education)", 1,1))}
  this.df$log_school_teacher_houses_temporary=impute_var( "log_school_teacher_houses_temporary","this.df")
  
  # Number of permanent houses for teachers
  this.df$log_school_teacher_houses_permanent=log(this.df$school_teacher_houses_permanent+1)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_school_teacher_houses_permanent", "Log Permanent Houses", "Log Number of Permanent Teacher Houses in School +1 (Malawi Dept of Education)", 1,1))}
  this.df$log_school_teacher_houses_permanent=impute_var( "log_school_teacher_houses_permanent","this.df")
  
  # Type of good provided
  this.df$good_type_dictionary=ifelse(this.df$good_type=="dictionary", 1, 0)
  this.df$good_type_dictionary=ifelse(this.df$attritted==1, NA, this.df$good_type_dictionary)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("good_type_dictionary", "Choice=Dictionary", "Allocation decision on this map was about dictionaries (survey)", 0,0))}
  
  this.df$good_type_bags=ifelse(this.df$good_type=="bags", 1, 0)
  this.df$good_type_bags=ifelse(this.df$attritted==1, NA, this.df$good_type_bags)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("good_type_bags",  "Choice=Teacher Bags", "Allocation decision on this map was about teacher bags (survey)", 0,0))}
  
  this.df$good_type_lamps=ifelse(this.df$good_type=="lamps", 1, 0)
  this.df$good_type_lamps=ifelse(this.df$attritted==1, NA, this.df$good_type_lamps)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("good_type_lamps", "Choice=Solar Lamps", "Allocation decision on this map was about solar lamps (survey)", 0,0))}
  
  
  # Support for leading opposition candidate in ward election
  this.df$ps_opposition_votes_lc = schooldata.lc[match(this.df$school_id, schooldata.lc$school_id), "ward_runnerup_votes"]
  this.df$ps_opposition_percent_lc = this.df$ps_opposition_votes_lc/this.df$total_votes_local
  this.df$ps_opposition_votes_lc=impute_var( "ps_opposition_votes_lc","this.df")
  this.df$ps_opposition_percent_lc=impute_var( "ps_opposition_percent_lc","this.df")
  
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ps_opposition_votes_lc", "Opposition Votes (LC)", "Votes at Polling Station for Leading Opposition Candidate in Councillor Election (Malawi Electoral Commission)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ps_opposition_percent_lc", "Opposition Percent (LC)", "Percent Votes at Polling Station for Leading Opposition Candidate in Councillor Election (Malawi Electoral Commission)", 1,1))}
  
  # Support for leading opposition candidate in MP election
  this.df$ps_opposition_votes_mp = psdata.mp[match(this.df$pollingstationno, psdata.mp$station), "ps_opposition_votes"]
  this.df$ps_opposition_percent_mp = psdata.mp[match(this.df$pollingstationno, psdata.mp$station), "ps_opposition_percent"]
  this.df$ps_victory_margin_mp = psdata.mp[match(this.df$pollingstationno, psdata.mp$station), "ps_victory_margin"]
  
  this.df$ps_opposition_votes_mp=impute_var( "ps_opposition_votes_mp","this.df")
  this.df$ps_opposition_percent_mp=impute_var( "ps_opposition_percent_mp","this.df")
  this.df$ps_victory_margin_mp=impute_var( "ps_victory_margin_mp","this.df")
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ps_opposition_votes_mp","Opposition Votes (MP)", "Votes at Polling Station for Leading Opposition Candidate in MP Election (Malawi Electoral Commission)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ps_opposition_percent_mp","Percent Votes (MP)", "Percent Votes at Polling Station for Leading Opposition Candidate in MP Election (Malawi Electoral Commission)", 1,1))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ps_victory_margin_mp", "Victory Margin (MP)", "Victory Margin at Polling Station for incumbent MP  (Malawi Electoral Commission)", 1,0))}
  
  
  if(this.dfname %in% c("c.survey.all", "c.survey")){
    this.df$ps_opposition_percent=this.df$ps_opposition_percent_lc
  }
  if(this.dfname %in% c("mp.survey.all", "mp.survey")){
    this.df$ps_opposition_percent=this.df$ps_opposition_percent_mp
  }
  
  
  
  
  #pop density at school
  this.df$pop_per_hectacre = schooldata.lc[match(this.df$school_id, schooldata.lc$school_id), "pop_per_hectacre"]
  this.df$pop_per_hectacre_imp=impute_var( "pop_per_hectacre","this.df")
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("pop_per_hectacre", "Pop Density at School", "Population per Hectacre (World Pop Project)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("pop_per_hectacre_imp", "Pop Density at School", "Population per Hectacre (World Pop Project)", 1,1))}
  
  
  
  #poverty proportion at school
  if(this.dfname %in% c("c.survey.all", "c.survey")){
    this.df$poverty_proportion = schooldata.lc[match(this.df$school_id, schooldata.lc$school_id), "poverty_proportion"]
    this.df$poverty_proportion_upper = this.df$poverty_proportion+schooldata.lc[match(this.df$school_id, schooldata.lc$school_id), "poverty_proportion_unc"]
    this.df$poverty_proportion_lower = this.df$poverty_proportion-schooldata.lc[match(this.df$school_id, schooldata.lc$school_id), "poverty_proportion_unc"]
  }
  
  if(this.dfname %in% c("mp.survey.all", "mp.survey")){
    this.df$poverty_proportion = schooldata.mp[match(this.df$school_id, schooldata.mp$school_id), "poverty_proportion"]
    this.df$poverty_proportion_upper = this.df$poverty_proportion+schooldata.mp[match(this.df$school_id, schooldata.mp$school_id), "poverty_proportion_unc"]
    this.df$poverty_proportion_lower = this.df$poverty_proportion-schooldata.mp[match(this.df$school_id, schooldata.mp$school_id), "poverty_proportion_unc"]
  }
  
  this.df$poverty_proportion_imp=impute_var( "poverty_proportion","this.df")
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("poverty_proportion", "Poverty at School", "Proportion of Area in Poverty (World Pop Project)", 1,1))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("poverty_proportion_imp", "Poverty at School", "Proportion of Area in Poverty (World Pop Project)", 1,1))}
  
  
  this.df$ps_total_votes=NA
  this.df$log_ps_total_votes=NA
  
  if(this.dfname %in% c("c.survey.all","c.survey")){this.df$ps_total_votes= this.df$total_votes_local}
  if(this.dfname %in% c("mp.survey.all","mp.survey")){this.df$ps_total_votes= this.df$total_votes_parliamentary}
  if(this.dfname %in% c("c.survey.all")){
    labels=rbind(labels, c("ps_total_votes","Turnout", "Turnout at Polling Station", 0,1))
    labels=rbind(labels, c("log_ps_total_votes","Log Votes", "Log Votes at Polling Station", 1,1))}
  this.df$log_ps_total_votes=log(this.df$ps_total_votes+1)
  
  
  # Measures of level of political connection with the MP

  # Councillor gender
  this.df$councilor_gender=ifelse(this.df$gender=="Male", 1, NA)
  this.df$councilor_gender=ifelse(this.df$gender=="Female", 0, this.df$councilor_gender)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("councilor_gender", "Gender", "Gender of respondent, male=1 and female=0 (survey)", 1,0))}
  
  # Status of ward education plan
  this.df$education_planYes=ifelse(this.df$education_plan!="" & !is.na(this.df$education_plan), 0, NA)
  this.df$education_planYes=ifelse(this.df$education_plan=="Yes", 1, this.df$education_planYes)
  this.df$education_planNo=ifelse(this.df$education_plan!="" & !is.na(this.df$education_plan), 0, NA)
  this.df$education_planNo=ifelse(this.df$education_plan=="No", 1, this.df$education_planNo)
  this.df$education_planDK=ifelse(this.df$education_plan!="" & !is.na(this.df$education_plan), 0, NA)
  this.df$education_planDK=ifelse(this.df$education_plan=="Don?<U+0080><U+0099>t know", 1, this.df$education_planDK)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("education_planYes","Education Plan=Yes", "Incumbent's council has an education plan=Yes (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("education_planNo", "Education Plan=No", "Incumbent's council has an education plan=No (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("education_planDK", "Education Plan=Don't Know", "Incumbent's council has an education plan=Don't Know (survey)", 1,0))}
  
  #Tribe of councillor/mp
  
  xtabs(~this.df$tribe)
  
  this.df$tribeChewa=ifelse(this.df$tribe!="" & !is.na(this.df$tribe), 0, NA)
  this.df$tribeLomwe=ifelse(this.df$tribe!="" & !is.na(this.df$tribe), 0, NA)
  this.df$tribeNgoni=ifelse(this.df$tribe!="" & !is.na(this.df$tribe), 0, NA)
  this.df$tribeOther=ifelse(this.df$tribe!="" & !is.na(this.df$tribe), 0, NA)
  this.df$tribeSena=ifelse(this.df$tribe!="" & !is.na(this.df$tribe), 0, NA)
  this.df$tribeTumbuka=ifelse(this.df$tribe!="" & !is.na(this.df$tribe), 0, NA)
  this.df$tribeYao=ifelse(this.df$tribe!="" & !is.na(this.df$tribe), 0, NA)
  
  this.df$tribeChewa=ifelse(this.df$tribe=="Chewa", 1, this.df$tribeChewa)
  this.df$tribeLomwe=ifelse(this.df$tribe=="Lomwe", 1, this.df$tribeLomwe)
  this.df$tribeNgoni=ifelse(this.df$tribe=="Ngoni", 1, this.df$tribeNgoni)
  this.df$tribeOther=ifelse(this.df$tribe=="Other", 1, this.df$tribeOther)
  this.df$tribeSena=ifelse(this.df$tribe=="Sena", 1, this.df$tribeSena)
  this.df$tribeTumbuka=ifelse(this.df$tribe=="Tumbuka", 1, this.df$tribeTumbuka)
  this.df$tribeYao=ifelse(this.df$tribe=="Yao", 1, this.df$tribeYao)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeChewa","IncumbentTribe=Chewa", "Incumbent is from Chewa tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeLomwe","IncumbentTribe=Lomwe", "Incumbent is from Lomwe tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeNgoni","IncumbentTribe=Ngoni", "Incumbent is from Ngoni tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeOther","IncumbentTribe=Other", "Incumbent is from Other tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeSena","IncumbentTribe=Sena", "Incumbent is from Sena tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeTumbuka","IncumbentTribe=Tumbuka", "Incumbent is from Tumbuka tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeYao","IncumbentTribe=Yao", "Incumbent is from Yao tribe (survey)", 1,0))}
  
  # Predominate tribe of ward
  xtabs(~this.df$tribe_of_constituency)
  
  this.df$tribe_of_constituencyChewa=ifelse(this.df$tribe_of_constituency!="" & !is.na(this.df$tribe_of_constituency), 0, NA)
  this.df$tribe_of_constituencyLomwe=ifelse(this.df$tribe_of_constituency!="" & !is.na(this.df$tribe_of_constituency), 0, NA)
  this.df$tribe_of_constituencyNgoni=ifelse(this.df$tribe_of_constituency!="" & !is.na(this.df$tribe_of_constituency), 0, NA)
  this.df$tribe_of_constituencyOther=ifelse(this.df$tribe_of_constituency!="" & !is.na(this.df$tribe_of_constituency), 0, NA)
  this.df$tribe_of_constituencySena=ifelse(this.df$tribe_of_constituency!="" & !is.na(this.df$tribe_of_constituency), 0, NA)
  this.df$tribe_of_constituencyTumbuka=ifelse(this.df$tribe_of_constituency!="" & !is.na(this.df$tribe_of_constituency), 0, NA)
  this.df$tribe_of_constituencyYao=ifelse(this.df$tribe_of_constituency!="" & !is.na(this.df$tribe_of_constituency), 0, NA)
  
  this.df$tribe_of_constituencyChewa=ifelse(this.df$tribe_of_constituency=="Chewa", 1, this.df$tribe_of_constituencyChewa)
  this.df$tribe_of_constituencyLomwe=ifelse(this.df$tribe_of_constituency=="Lomwe", 1, this.df$tribe_of_constituencyLomwe)
  this.df$tribe_of_constituencyNgoni=ifelse(this.df$tribe_of_constituency=="Ngoni", 1, this.df$tribe_of_constituencyNgoni)
  this.df$tribe_of_constituencyOther=ifelse(this.df$tribe_of_constituency=="Other", 1, this.df$tribe_of_constituencyOther)
  this.df$tribe_of_constituencySena=ifelse(this.df$tribe_of_constituency=="Sena", 1, this.df$tribe_of_constituencySena)
  this.df$tribe_of_constituencyTumbuka=ifelse(this.df$tribe_of_constituency=="Tumbuka", 1, this.df$tribe_of_constituencyTumbuka)
  this.df$tribe_of_constituencyYao=ifelse(this.df$tribe_of_constituency=="Yao", 1, this.df$tribe_of_constituencyYao)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeChewa", "ConstituencyTribe=Chewa", "Constituency is predominately from Chewa tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeLomwe", "ConstituencyTribe=Lomwe","Constituency is predominately from Lomwe tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeNgoni", "ConstituencyTribe=Ngoni","Constituency is predominately from Ngoni tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeOther", "ConstituencyTribe=Other","Constituency is predominately from Other tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeSena", "ConstituencyTribe=Sena","Constituency is predominately from Sena tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeTumbuka", "ConstituencyTribe=Tumbuka","Constituency is predominately from Tumbuka tribe (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("tribeYao", "ConstituencyTribe=Yao","Constituency is predominately from Yao tribe (survey)", 1,0))}
  
  
  # Councillor/mp re-election plans
  xtabs(~this.df$aid_56)
  this.df$contest_electionYes=ifelse(this.df$aid_56!="" & !is.na(this.df$aid_56), 0, NA)
  this.df$contest_electionNo=ifelse(this.df$aid_56=="No", 1, this.df$contest_electionYes)
  this.df$contest_electionDK=ifelse(this.df$aid_56=="Don?<U+0080><U+0099>t know", 1, this.df$contest_electionYes)
  this.df$contest_electionUndecided=ifelse(this.df$aid_56=="Undecided", 1, this.df$contest_electionYes)
  this.df$contest_electionYes=ifelse(this.df$aid_56=="Yes", 1, this.df$contest_electionYes)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("contest_electionYes","Contest=Yes", "Plan to contest election=Yes (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("contest_electionNo","Contest=No", "Plan to contest election=No (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("contest_electionDK","Contest=Don't Know", "Plan to contest election=Don't Know (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("contest_electionUndecided","Contest=Undecided", "Plan to contest election=Undecided (survey)", 1,0))}
  
  # Councillor victory margin
  
  this.df$ward_victory_margin = elecresults.lc[match(this.df$ps_ward_id, elecresults.lc$WardId),"IncumbentVictoryMargin"]
  this.df$ward_incumbent_percent = elecresults.lc[match(this.df$ps_ward_id, elecresults.lc$WardId),"IncumbentPercent"]
  this.df$ward_opposition_percent = elecresults.lc[match(this.df$ps_ward_id, elecresults.lc$WardId),"OppositionPercent"]
  this.df$ward_turnout = elecresults.lc[match(this.df$ps_ward_id, elecresults.lc$WardId),"Turnout"]
  this.df$ward_regvoters = elecresults.lc[match(this.df$ps_ward_id, elecresults.lc$WardId),"RegVoters"]
  this.df$ward_victory_margin=impute_var( "ward_victory_margin","this.df")
  this.df$ward_incumbent_percent=impute_var( "ward_incumbent_percent","this.df")
  this.df$ward_opposition_percent=impute_var( "ward_opposition_percent","this.df")
  this.df$ward_turnout=impute_var( "ward_turnout","this.df")
  this.df$ward_regvoters=impute_var( "ward_regvoters","this.df")
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_victory_margin", "Victory Margin in Ward", "Victory margin of ward incumbent (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_percent", "Incumbent Percent Votes in Ward", "Percent votes for ward incumbent (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_opposition_percent", "Opposition Percent Votes in Ward", "Percent votes for leading opposition candidate in ward (Malawi Electoral Commission)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_turnout", "Turnout Percent in Ward", "Turnout % in the ward (Malawi Electoral Commission)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_regvoters", "Registered Voters in Ward", "Registered voters in the ward (Malawi Electoral Commission)", 0,0))}
  
  # MP victory margin
  this.df$const_incumbent_percent = elecresults.mp[match(this.df$constituencyid, elecresults.mp[elecresults.mp$rank==1,]$constituency),"vote_share"]
  this.df$const_opposition_percent = elecresults.mp[match(this.df$constituencyid, elecresults.mp[elecresults.mp$rank==2,]$constituency),"vote_share"]
  this.df$const_victory_margin = this.df$const_incumbent_percent-this.df$const_opposition_percent
  this.df$const_turnout=elecresults.mp[match(this.df$constituencyid, elecresults.mp[elecresults.mp$rank==1,]$constituency),"total"]
  
  this.df$const_victory_margin=impute_var( "const_victory_margin","this.df")
  this.df$const_incumbent_percent=impute_var( "const_incumbent_percent","this.df")
  this.df$const_opposition_percent=impute_var( "const_opposition_percent","this.df")
  this.df$const_turnout=impute_var( "const_turnout","this.df")
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_victory_margin", "Victory Margin in Constituency", "Victory margin of constituency incumbent (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_percent", "Percent Votes in Constituency", "Percent votes for constituency incumbent (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_opposition_percent", "Opposition Votes in Constituency", "Percent votes for leading oppositoin candidate in constituency (Malawi Electoral Commission)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_turnout", "Votes in Constituency", "Total votes in the constituency (Malawi Electoral Commission)", 0,0))}
  
  if(this.dfname %in% c("mp.survey.all","mp.survey")){this.df$turnout= this.df$const_turnout}
  if(this.dfname %in% c("c.survey.all","c.survey")){this.df$turnout= this.df$ward_turnout}
  
  
  
  # Predominate party of the ward
  #I don't think we collected this information. Use party of incumbent?  Included below. 
  
  # Education of the councillor/mp
  xtabs(~mp.survey.all$education)  
  this.df$educationCertificate=ifelse(this.df$education!="" & !is.na(this.df$education), 0, NA)
  this.df$educationDegree=ifelse(this.df$education=="Degree", 1, this.df$educationCertificate)
  this.df$educationDiploma=ifelse(this.df$education=="Diploma", 1, this.df$educationCertificate)
  this.df$educationPhD=ifelse(this.df$education=="Ph.D.", 1, this.df$educationCertificate)
  this.df$educationPrimary=ifelse(this.df$education=="Primary School", 1, this.df$educationCertificate)
  this.df$educationSecondary=ifelse(this.df$education=="Secondary School", 1, this.df$educationCertificate)
  this.df$educationCertificate=ifelse(this.df$education=="Certificate", 1, this.df$educationCertificate)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("educationCertificate", "HighestEd=Certificate", "Incumbent's highest education level=Certificate (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("educationDegree", "HighestEd=Degree","Incumbent's highest education level=Degree (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("educationDiploma", "HighestEd=Diploma","Incumbent's highest education level=Diploma (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("educationPhD", "HighestEd=PhD","Incumbent's highest education level=PhD (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("educationPrimary", "HighestEd=Primary","Incumbent's highest education level=Primary (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("educationSecondary", "HighestEd=Secondary","Incumbent's highest education level=Secondary (survey)", 1,0))}
  
  # Income of the councillor
  xtabs(~mp.survey$income)  
  this.df$income1=ifelse(this.df$income!="" & !is.na(this.df$income), 0, NA)
  this.df$income2=ifelse(this.df$income=="100,000-200,000 kwacha/month", 1, this.df$income1)
  this.df$income3=ifelse(this.df$income=="200,000-400,000 kwacha/month", 1, this.df$income1)
  this.df$income4=ifelse(this.df$income=="400,000-1,000,000 kwacha/month", 1, this.df$income1)
  this.df$income5=ifelse(this.df$income=="1,000,000-5,000,000 kwacha/month", 1, this.df$income1)
  this.df$income6=ifelse(this.df$income=="Over 5,000,000 kwacha/month", 1, this.df$income1)
  this.df$incomeDecline=ifelse(this.df$income=="Decline to answer", 1, this.df$income1)
  this.df$income1=ifelse(this.df$income=="Under 100,000 kwacha/month", 1, this.df$income1)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("income2", "Income1", "Incumbent household income 100,000-200,000 kwacha/month (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("income3", "Income2","Incumbent household income 200,000-400,000 kwacha/month (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("income4", "Income3","Incumbent household income 400,000-1,000,000 kwacha/month (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("income5", "Income4","Incumbent household income 1,000,000-5,000,000 kwacha/month (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("income6", "Income5","Over 5,000,000 kwacha/month (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("income1", "Income6","Under 100,000 kwacha/month (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("incomeDecline", "IncomeDeclined","Incumbent declined to declare income (survey)", 1,0))}
  
  # Length of residence in the ward
  this.df$length_of_residence=this.df$lenght_of_residence
  this.df$lenght_of_residence=NULL
  this.df$length_of_residence1=ifelse(this.df$length_of_residence!="" & !is.na(this.df$length_of_residence), 0, NA)
  this.df$length_of_residence2=ifelse(this.df$length_of_residence=="5 to 10 years", 1, this.df$length_of_residence1)
  this.df$length_of_residence3=ifelse(this.df$length_of_residence=="More than 10 years", 1, this.df$length_of_residence1)
  this.df$length_of_residence4=ifelse(this.df$length_of_residence=="All of my life", 1, this.df$length_of_residence1)
  
  this.df$length_of_residenceDK=ifelse(this.df$length_of_residence=="Don?<U+0080><U+0099>t know", 1, this.df$length_of_residence1)
  this.df$length_of_residence1=ifelse(this.df$length_of_residence=="Less than 5 years", 1, this.df$length_of_residence1)
  this.df$length_of_residence_index=NA
  this.df$length_of_residence_index=ifelse(this.df$length_of_residence1==1, 0, this.df$length_of_residence_index)
  this.df$length_of_residence_index=ifelse(this.df$length_of_residence2==1, 1, this.df$length_of_residence_index)
  this.df$length_of_residence_index=ifelse(this.df$length_of_residence3==1, 2, this.df$length_of_residence_index)
  this.df$length_of_residence_index=ifelse(this.df$length_of_residence4==1, 3, this.df$length_of_residence_index)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("length_of_residence1", "LengthResidence1", "Incumbent resided in constituency less than 5 years (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("length_of_residence2", "LengthResidence2", "Incumbent resided in constituency 5-10 years (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("length_of_residence3", "LengthResidence3", "Incumbent resided in constituency more than 10 years (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("length_of_residence4", "LengthResidence4", "Incumbent resided in constituency all their life (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("length_of_residence_index", "Length of Residence", "0-3 index of how long incumbent resided in constituency (<5 yrs, 5-10 yrs, >10yrs or entire life) (survey)", 1,0))}
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("length_of_residenceDK", "LengthResidenceDontKnow", "Incumbent doesn't know how long s/he resided in constituency (survey)", 1,0))}
  
  # Councillor age
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("age", "Age", "Incumbent age (survey)", 1,0))}
  
  
  # Marriage status
  xtabs(~mp.survey$aid_15)
  this.df$marriageMarried=ifelse(this.df$aid_15!="" & !is.na(this.df$aid_15), 0, NA)
  
  this.df$marriageDivorced=ifelse(this.df$aid_15=="Divorced", 1, this.df$marriageMarried)
  this.df$marriageMultiple=ifelse(this.df$aid_15=="Married with Multiple Wives", 1, this.df$marriageMarried)
  this.df$marriageSingle=ifelse(this.df$aid_15=="Single", 1, this.df$marriageMarried)
  this.df$marriageWidowed=ifelse(this.df$aid_15=="Widowed", 1, this.df$marriageMarried)
  this.df$marriageDK=ifelse(this.df$aid_15=="Don?<U+0080><U+0099>t know", 1, this.df$marriageMarried)
  this.df$marriageMarried=ifelse(this.df$aid_15=="Married", 1, this.df$marriageMarried)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("marriageMarried", "Married=OneWife", "Incumbent is married with one wife (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("marriageDivorced", "Married=Divorced", "Incumbent is divorced (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("marriageSingle", "Married=Single", "Incumbent is single (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("marriageWidowed", "Married=Widowed", "Incumbent is widowed (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("marriageDK", "Married=DontKnow", "Incumbent doesn't know marriage status (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("marriageMultiple", "Married=Multiple", "Incumbent is married with multiple wives (survey)", 1,0))}
  
  # Current party of the councillor
  xtabs(~c.survey$current_party)
  this.df$current_partyAFORD=ifelse(this.df$current_party!="" & !is.na(this.df$current_party) & this.df$attritted==0, 0, NA)
  this.df$current_partyDPP=ifelse(this.df$current_party=="dpp", 1, this.df$current_partyAFORD)
  this.df$current_partyIndependent=ifelse(this.df$current_party=="independent", 1, this.df$current_partyAFORD)
  this.df$current_partyMCP=ifelse(this.df$current_party=="mcp", 1, this.df$current_partyAFORD)
  this.df$current_partyDeclined=ifelse(this.df$current_party=="no answer" & this.df$attritted==0, 1, this.df$current_partyAFORD)
  this.df$current_partyPP=ifelse(this.df$current_party=="pp", 1, this.df$current_partyAFORD)
  this.df$current_partyUDF=ifelse(this.df$current_party=="udf", 1, this.df$current_partyAFORD)
  this.df$current_partyAFORD=ifelse(this.df$current_party=="aford", 1, this.df$current_partyAFORD)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("current_partyAFORD", "VoteAFORD", "Incumbent would vote for AFORD party (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("current_partyDPP", "VoteDPP", "Incumbent would vote for DPP party (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("current_partyIndependent", "VoteIndependent","Incumbent would vote for Independent party (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("current_partyMCP", "VoteMCP","Incumbent would vote for MCP party (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("current_partyDeclined", "VoteDeclined","Incumbent declined to declare party vote (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("current_partyPP", "VotePP","Incumbent would vote for PP party (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("current_partyUDF", "VoteUDF","Incumbent would vote for UDF party (survey)", 1,0))}
  
  
  # Number of schools
  this.df$school_count = NA
  if(this.dfname=="mp.survey.all"){this.df$school_count=this.df$num_schools_in_constituency_tot}
  if(this.dfname=="mp.survey"){this.df$school_count=this.df$num_schools_in_constituency_tot}
  if(this.dfname=="c.survey"){this.df$school_count=this.df$num_schools_in_ward_tot}
  if(this.dfname=="c.survey.all"){this.df$school_count=this.df$num_schools_in_ward_tot}
  
  this.df$log_school_count=log(this.df$school_count)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("log_school_count", "Log School Count", "Log number of primary schools in ward/constituency (Ministry of Education)", 1,0))}
  this.df$log_school_count=impute_var( "log_school_count","this.df")
  
  
  # Urban/Rural population
  #not available. Using pop density instead
  this.df$pop_density=log(this.df$wp_mean)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("pop_density", "Pop Density", "Average number of persons per grid cell in ward/constituency (WorldPop)", 1,0))}
  this.df$pop_density=impute_var( "pop_density","this.df")
  
  #percent votes incumbent. Local or local2? Use local2
  this.df$winner_percent = NA
  
  if(this.dfname=="mp.survey.all"){this.df$winner_percent=this.df$winner_percent_parliamentary}
  if(this.dfname=="mp.survey"){this.df$winner_percent=this.df$winner_percent_parliamentary}
  if(this.dfname=="c.survey"){this.df$winner_percent=this.df$winner_percent_local2}
  if(this.dfname=="c.survey.all"){this.df$winner_percent=this.df$winner_percent_local2}
  this.df$winner_percent_imp=impute_var( "winner_percent","this.df")
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("winner_percent_imp", "Incumbent Percent", "Percent votes at polling station for incumbent (Malawi Electoral Commission)", 1,0))}
  
  this.df$ps_victory_margin=this.df$winner_percent-this.df$ps_opposition_percent
  
  #percent votes incumbent. 
  this.df$winner_votes = NA
  
  if(this.dfname=="mp.survey.all"){this.df$winner_votes=this.df$winner_votes_parliamentary}
  if(this.dfname=="mp.survey"){this.df$winner_votes=this.df$winner_votes_parliamentary}
  if(this.dfname=="c.survey"){this.df$winner_votes=this.df$winner_votes_local}
  if(this.dfname=="c.survey.all"){this.df$winner_votes=this.df$winner_votes_local}
  this.df$winner_votes_imp=impute_var( "winner_votes","this.df")
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("winner_votes_imp", "Incumbent Votes", "Votes at polling station for incumbent (Malawi Electoral Commission)", 1,0))}
  
  this.df$log_winner_votes=log(this.df$winner_votes+1)
  
  # Elected party of the councillor (this is not explicit in the PAP but implied)
  this.df$ward_incumbent_party = elecresults.lc[match(this.df$ps_ward_id, elecresults.lc$WardId),"IncumbentParty"]

  
  this.df$ward_incumbent_partyAFORD=ifelse(this.df$ward_incumbent_party!="" & !is.na(this.df$ward_incumbent_party), 0, NA)
  this.df$ward_incumbent_partyOther=ifelse(this.df$ward_incumbent_party %in% c("CCP", "NASAF", "UIP"), 1, this.df$ward_incumbent_partyAFORD)
  this.df$ward_incumbent_partyDPP=ifelse(this.df$ward_incumbent_party=="DPP", 1, this.df$ward_incumbent_partyAFORD)
  this.df$ward_incumbent_partyIndependent=ifelse(this.df$ward_incumbent_party=="Ind" | this.df$ward_incumbent_party=="INDEPENDENT" | this.df$ward_incumbent_party=="Independent", 1, this.df$ward_incumbent_partyAFORD)
  this.df$ward_incumbent_partyMCP=ifelse(this.df$ward_incumbent_party=="MCP", 1, this.df$ward_incumbent_partyAFORD)
  this.df$ward_incumbent_partyPP=ifelse(this.df$ward_incumbent_party=="PP", 1, this.df$ward_incumbent_partyAFORD)
  this.df$ward_incumbent_partyUDF=ifelse(this.df$ward_incumbent_party=="UDF", 1, this.df$ward_incumbent_partyAFORD)
  this.df$ward_incumbent_partyAFORD=ifelse(this.df$ward_incumbent_party=="AFORD", 1, this.df$ward_incumbent_partyAFORD)
  this.df$ward_incumbent_partyAFORD=impute_var( "ward_incumbent_partyAFORD","this.df")
  this.df$ward_incumbent_partyOther=impute_var( "ward_incumbent_partyOther","this.df")
  this.df$ward_incumbent_partyIndependent=impute_var( "ward_incumbent_partyIndependent","this.df")
  this.df$ward_incumbent_partyMCP=impute_var( "ward_incumbent_partyMCP","this.df")
  this.df$ward_incumbent_partyPP=impute_var( "ward_incumbent_partyPP","this.df")
  this.df$ward_incumbent_partyUDF=impute_var( "ward_incumbent_partyUDF","this.df")
  this.df$ward_incumbent_partyDPP=impute_var( "ward_incumbent_partyDPP","this.df")
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_partyAFORD", "CouncilorPartyAFORD", "Councilor ran under AFORD party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_partyOther", "CouncilorPartyOther", "Councilor ran under CCP, NASAF or UIP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_partyDPP", "CouncilorPartyDPP","Councilor ran under DPP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_partyIndependent", "CouncilorPartyIndependent","Councilor ran as independent (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_partyMCP", "CouncilorPartyMCP","Councilor ran under MCP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_partyPP", "CouncilorPartyPP","Councilor ran under PP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("ward_incumbent_partyUDF", "CouncilorPartyUDF","Councilor ran under UDF party (Malawi Electoral Commission)", 1,0))}
  
  # Elected party of the mp (this is not explicit in the PAP but implied)
  this.df$const_incumbent_party = elecresults.mp[match(this.df$constituencyid, elecresults.mp$constituency),"party"]
  xtabs(~this.df$const_incumbent_party)
  
  this.df[is.na(this.df$const_incumbent_party),]$constituencyid
  
  this.df$const_incumbent_partyAFORD=ifelse(this.df$const_incumbent_party!="" & !is.na(this.df$const_incumbent_party), 0, NA)
  this.df$const_incumbent_partyOther=ifelse(this.df$const_incumbent_party %in% c("CCP", "NASAF", "UIP", "MAFUNDE", "NLP", "UDK", "MPP", "PETRA", "NARC","UIP"), 1, this.df$const_incumbent_partyAFORD)
  this.df$const_incumbent_partyDPP=ifelse(this.df$const_incumbent_party=="DPP", 1, this.df$const_incumbent_partyAFORD)
  this.df$const_incumbent_partyIndependent=ifelse(this.df$const_incumbent_party=="Ind" | this.df$const_incumbent_party=="INDEPENDENT", 1, this.df$const_incumbent_partyAFORD)
  this.df$const_incumbent_partyMCP=ifelse(this.df$const_incumbent_party=="MCP", 1, this.df$const_incumbent_partyAFORD)
  this.df$const_incumbent_partyPP=ifelse(this.df$const_incumbent_party=="PP", 1, this.df$const_incumbent_partyAFORD)
  this.df$const_incumbent_partyUDF=ifelse(this.df$const_incumbent_party=="UDF", 1, this.df$const_incumbent_partyAFORD)
  this.df$const_incumbent_partyPPM=ifelse(this.df$const_incumbent_party=="PPM", 1, this.df$const_incumbent_partyAFORD)
  this.df$const_incumbent_partyAFORD=ifelse(this.df$const_incumbent_party=="AFORD", 1, this.df$const_incumbent_partyAFORD)
  
  this.df$const_incumbent_partyAFORD=impute_var( "const_incumbent_partyAFORD","this.df")
  this.df$const_incumbent_partyOther=impute_var( "const_incumbent_partyOther","this.df")
  this.df$const_incumbent_partyIndependent=impute_var( "const_incumbent_partyIndependent","this.df")
  this.df$const_incumbent_partyMCP=impute_var( "const_incumbent_partyMCP","this.df")
  this.df$const_incumbent_partyPP=impute_var( "const_incumbent_partyPP","this.df")
  this.df$const_incumbent_partyUDF=impute_var( "const_incumbent_partyUDF","this.df")
  this.df$const_incumbent_partyDPP=impute_var( "const_incumbent_partyDPP","this.df")
  this.df$const_incumbent_partyPPM=impute_var( "const_incumbent_partyPPM","this.df")
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyAFORD", "MPPartyAFORD", "MP ran under AFORD party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyOther", "MPPartyOther", "MP ran under CCP, NASAF or UIP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyDPP", "MPPartyDPP", "MP ran under DPP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyIndependent", "MPPartyIndependent", "MP ran as independent (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyMCP", "MPPartyMCP", "MP ran under MCP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyPP", "MPPartyPP", "MP ran under PP party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyUDF", "MPPartyUDF", "MP ran under UDF party (Malawi Electoral Commission)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("const_incumbent_partyPPM", "MPPartyPPM", "MP ran under PPM party (Malawi Electoral Commission)", 1,0))}
  
  #setup treatments
  
  this.df$transparency_radio=ifelse(!is.na(this.df$transparency_condition) & nchar(this.df$transparency_condition)>1, 0,NA)
  this.df$transparency_donor=ifelse(this.df$transparency_condition %in% c("donor_audit", "radio + donor_audit"), 1,this.df$transparency_radio)
  this.df$transparency_all=ifelse(this.df$transparency_condition %in% c("donor_audit", "radio", "radio + donor_audit"), 1,this.df$transparency_radio)
  this.df$transparency_control=ifelse(this.df$transparency_condition %in% c("control"), 1,this.df$transparency_radio)
  this.df$transparency_radio=ifelse(this.df$transparency_condition %in% c("radio", "radio + donor_audit"), 1,this.df$transparency_radio)
  
  this.df$information_aid=ifelse(!is.na(this.df$treatment), 0,NA)  
  this.df$information_votes=ifelse(!is.na(this.df$treatment), 0,NA)  
  this.df$information_need=ifelse(!is.na(this.df$treatment), 0,NA)  
  this.df$information_any=ifelse(!is.na(this.df$treatment), 0,NA)  
  
  this.df$information_aid=ifelse(this.df$treatment %in% c("G", "L", "M", "N", "O"),1,this.df$information_aid)
  this.df$information_need=ifelse(this.df$treatment %in% c("E", "F", "I", "J", "L", "M", "N", "O"),1,this.df$information_need)
  this.df$information_votes=ifelse(this.df$treatment %in% c("D", "I", "J", "K", "N", "O"),1,this.df$information_votes)
  this.df$information_any=ifelse(!(this.df$treatment %in% c("C")),1,this.df$information_any)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("information_aid", "Aid Treatment", "Equals one if a map was assigned the aid information treatment and zero otherwise", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("information_need", "Need Treatment", "Equals one if a map was assigned the school need information treatment and zero otherwise", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("information_votes", "Voting Treatment", "Equals one if a map was assigned the percent votes information treatment and zero otherwise", 0,0))}
  
  
  this.df$school_knowledge = (this.df$test_least_classes+this.df$test_most_enrollment+this.df$test_enrollment_specific)/3
  this.df$political_knowledge = (this.df$test_least_percentvotes+this.df$test_percentvotes_specific)/2
  
  this.df$test_donor_specific=impute_var( "test_donor_specific","this.df")
  this.df$donor_knowledge = (this.df$test_most_projects + this.df$test_donor_specific)/2
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("school_knowledge", "Knowledge of Schools", "Average score in school knowledge questions (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("political_knowledge", "Knowledge of Politics", "Average score in political knowledge questions (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("donor_knowledge", "Knowledge of Donors", "Average score in donor knowledge questions (survey)", 1,0))}
  
  
  this.df$number_aid_categories=this.df$project_capacity_building+this.df$project_community_support+this.df$project_construction+this.df$project_food_provision+this.df$project_gender_issues+this.df$project_health_services+this.df$project_teacher_training
  
  this.df$number_aid_categories=log(this.df$number_aid_categories+1)
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("number_aid_categories", "Aid Good Types", "A count of the number of types of aid projects delivered by donors at this school (donors)", 1,0))}
  
  this.df$donor_frequency=NA
  this.df$donor_frequency=ifelse(this.df$aid_50=="Never", 0, this.df$donor_frequency)
  this.df$donor_frequency=ifelse(this.df$aid_50=="Rarely", 1, this.df$donor_frequency)
  this.df$donor_frequency=ifelse(this.df$aid_50=="A few times per year", 2, this.df$donor_frequency)
  this.df$donor_frequency=ifelse(this.df$aid_50=="Once a month", 3, this.df$donor_frequency)
  this.df$donor_frequency=ifelse(this.df$aid_50=="Once every other week", 4, this.df$donor_frequency)
  this.df$donor_frequency=ifelse(this.df$aid_50=="Once a week", 5, this.df$donor_frequency)
  this.df$donor_frequency=ifelse(this.df$aid_50=="Daily", 6, this.df$donor_frequency)
  this.df$donor_frequency_imp=impute_var( "donor_frequency","this.df")
  
  this.df$useful=NA
  this.df$useful=ifelse(this.df$aid_61=="Very useful", 2, this.df$useful)
  this.df$useful=ifelse(this.df$aid_61=="Somewhat useful", 1, this.df$useful)
  this.df$useful=ifelse(this.df$aid_61=="Not very useful", 0, this.df$useful)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("useful", "Information Usefulness", "A 0 to 2 scale indicating how useful the information was to the respondent (survey)", 1,0))}
  
  this.df$learn=ifelse(this.df$aid_63=="Yes", 1, NA)
  this.df$learn=ifelse(this.df$aid_63 %in% c("No"), 0, this.df$learn)
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("learn", "Learning from Experiment", "Whether the respondent indicated that they learned something from the experimental interaction (survey)", 1,0))}
  
  
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("donor_frequency_imp", "Frequency of Donor Interaction", "A 0 to five scale indicating how frequently incumbents interact with donors (survey)", 1,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("student_to_teacher_ratio", "Student to Teacher Ratio", "Number of students per teacher in a school (Ministry of Education EMIS Statistics)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("student_to_classroom_ratio", "Student to Classroom Ratio", "Number of students per class in a school (Ministry of Education EMIS Statistics)", 0,0))}
  if(this.dfname=="c.survey.all"){labels=rbind(labels, c("temporary_to_permanent_ratio", "Temporary Classroom Ratio", "Number of temporary to permanent classrooms in a school (Ministry of Education EMIS Statistics)", 0,0))}
  
  
  xtabs(~this.df$aid_50)
  
  this.df$log_winner_percent = log(this.df$winner_percent+1)
  #setup normalized variables
  for(this.name in c("number_of_students", "ps_victory_margin", "log_winner_percent", "ps_opposition_percent", "winner_votes", "pop_per_hectacre", "school_need_index_ward", "school_need_index_constituency", "winner_percent", "winner_percent_imp", "student_to_teacher_ratio", "student_to_classroom_ratio",  "temporary_to_permanent_ratio", "past_aid_project", "number_aid_categories", "log_number_of_students")){
    this.var=eval(parse(text=paste("this.df$", this.name, sep="")))
    normvar=(this.var-mean(this.var, na.rm=TRUE))/sd(this.var, na.rm=TRUE)
    this.df[,paste("z_", this.name, sep="")]=normvar
    
  }
  
  this.df$past_aid_project=log(this.df$past_aid_project+1)
  
  assign(this.dfname, this.df)
  
}

labels=rbind(labels, c("school_need_index_ward", "School Need Index (ward)", "Index of school need within the ward (Ministry of Education)", 1,0))
labels=rbind(labels, c("school_need_index_constituency", "School Need Index (constituency)", "Index of school need within the constituency (Ministry of Education)", 1,0))
labels=rbind(labels, c("school_need_index", "School Need Index", "Index of school need within the constituency or ward (Ministry of Education)", 1,0))

labels=rbind(labels, c("past_aid_project", "Aid Project Count", "Number of aid projects at school (various donors)", 1,0))


labels=rbind(labels, c("test_least_classes", "Test Question Classes", "Whether the respondent could correctly identify a school with the least number of permanent classes", 0,0))
labels=rbind(labels, c("test_least_percentvotes", "Test Question Votes", "Whether the respondent could correctly identify a school with the least percentage of votes for the incumbent", 0,0))
labels=rbind(labels, c("test_most_enrollment", "Test Question Enrollment", "Whether the respondent could correctly identify a school with the highest number of students", 0,0))
labels=rbind(labels, c("test_most_projects", "Test Question Projects", "Whether the respondent could correctly identify a school with the most donor projects", 0,0))
labels=rbind(labels, c("test_enrollment_specific", "Test Question Enrollment Specific", "Whether the respondent could correctly identify the range of enrollment at a chosen school", 0,0))
labels=rbind(labels, c("test_percentvotes_specific", "Test Question Votes Specific", "Whether the respondent could correctly identify the range of percent votes at a chosen school", 0,0))

labels=rbind(labels, c("test_donor_specific", "Test Question Aid Projects Specific", "Whether the respondent could correctly identify one or more donors with projects on a map", 0,0))



labels=rbind(labels, c("transparency_radio", "Donor Transparency Treatment", "", 0,0))
labels=rbind(labels, c("transparency_donor", "Radio Transparency Treatment", "", 0,0))
labels=rbind(labels, c("transparency_all", "Any Transparency Treatment", "", 0,0))

labels=rbind(labels, c("d_past_aid_project", "Aid Project Count>0", "", 0,0))


mp.survey$map_id = mp.survey$map_id*1000
c.survey$map_id = c.survey$map_id*1
mp.survey.all$map_id = mp.survey.all$map_id*1000
c.survey.all$map_id = c.survey.all$map_id*1

mp.survey$incumbentselected = mp.survey$mpselected
mp.survey.all$incumbentselected = mp.survey.all$mpselected

c.survey$incumbentselected = c.survey$councilorselected
c.survey.all$incumbentselected = c.survey.all$councilorselected

mp.survey$school_need_index = mp.survey$school_need_index_constituency
mp.survey.all$school_need_index = mp.survey.all$school_need_index_constituency

c.survey$school_need_index = c.survey$school_need_index_ward
c.survey.all$school_need_index = c.survey.all$school_need_index_ward


mp.survey$z_school_need_index = mp.survey$z_school_need_index_constituency
mp.survey.all$z_school_need_index = mp.survey.all$z_school_need_index_constituency

c.survey$victory_margin = c.survey$ward_victory_margin
c.survey.all$victory_margin = c.survey.all$ward_victory_margin

mp.survey$victory_margin = mp.survey$const_victory_margin
mp.survey.all$victory_margin = mp.survey.all$const_victory_margin


c.survey$z_school_need_index = c.survey$z_school_need_index_ward
c.survey.all$z_school_need_index = c.survey.all$z_school_need_index_ward
c.survey$lc=1
mp.survey$lc=0
c.survey$mp=0
mp.survey$mp=1

all.surveys=rbind.match.columns(mp.survey, c.survey)


# if we want to re-generate labels
#write.csv(labels, "./Cleaning/data/labels.csv")

write.csv(mp.survey.all, "./output/mp_all_withcovariates.csv")
write.csv(mp.survey, "./output/mp_withcovariates.csv")
write.csv(c.survey.all, "./output/c_all_withcovariates.csv")
write.csv(c.survey, "./output/c_withcovariates.csv")
write.csv(all.surveys, "./output/all_withcovariates.csv")


